zinbModelFromW <- function(counts, sim_W){
mod <- model.matrix(~ sim_W)
zinb_sim <- zinbFit(counts, ncores = 3, X=mod, which_X_mu=1:3, which_X_pi=1:3,
commondispersion=FALSE)
true_alpha_mu <- zinb_sim@beta_mu[-1,]
true_alpha_pi <- zinb_sim@beta_pi[-1,]
true_beta_mu <- zinb_sim@beta_mu[1,,drop=FALSE]
true_beta_pi <- zinb_sim@beta_pi[1,,drop=FALSE]
true_gamma_mu <- zinb_sim@gamma_mu
true_gamma_pi <- zinb_sim@gamma_pi
true_zeta <- zinb_sim@zeta
zinbModel(W=sim_W, alpha_mu=true_alpha_mu, alpha_pi=true_alpha_pi,
beta_mu=true_beta_mu, beta_pi=true_beta_pi,
zeta = true_zeta, gamma_mu=true_gamma_mu,
gamma_pi=true_gamma_pi)
}
simSummary = data.frame(realData = rep('Allen', 3),
K = rep(2, 3),
nclusters = rep(3, 3),
corMuPi = rep('no', 3),
dim = rep('1000x100', 3),
zeroFract = c(.25, .5, .75),
nreplicates = 10)
kable(simSummary)
| realData | K | nclusters | corMuPi | dim | zeroFract | nreplicates |
|---|---|---|---|---|---|---|
| Allen | 2 | 3 | no | 1000x100 | 0.25 | 10 |
| Allen | 2 | 3 | no | 1000x100 | 0.50 | 10 |
| Allen | 2 | 3 | no | 1000x100 | 0.75 | 10 |
To simulate a dataset, steps are as follow
Using this procedure, we have the simulated counts and true \(W\), \(\gamma_{\mu}\), \(\gamma_{\pi}\), \(\alpha_{\mu}\), \(\alpha_{\pi}\), \(\beta_{\mu}\), \(\beta_{\pi}\), and \(\zeta\).
To simulate the three datasets from Allen real dataset, the same 100 cells were used.
data("allen")
prefilter <- allen[grep("^ERCC-", rownames(allen), invert = TRUE),
which(colData(allen)$Core.Type=="Core")]
sampleCells <- sample(ncol(prefilter), 200)
postfilter <- prefilter[, sampleCells]
filterGenes <- apply(assay(postfilter) > 5, 1, sum) >= 5
postfilter <- postfilter[filterGenes, ]
bio = bioIni = as.factor(colData(postfilter)$driver_1_s)
cl <- as.factor(colData(postfilter)$Primary.Type)
postfilter <- assay(postfilter)
vars <- rowVars(log1p(postfilter))
names(vars) <- rownames(postfilter)
vars <- sort(vars, decreasing = TRUE)
pZero <- rowMeans(log1p(postfilter) == 0)
names(pZero) <- rownames(postfilter)
pZero = pZero[names(vars)]
chosenGenes = c(names(pZero)[pZero > 0.75][1:100],
names(pZero)[pZero<0.75 & pZero>0.6][1:400],
names(pZero)[pZero<0.6 & pZero>0.2][1:400],
names(pZero)[pZero<0.2][1:100])
core <- postfilter[chosenGenes, ]
#sum(core == 0)/(nrow(core) * ncol(core))
The dimensions are
dim(core)
## [1] 1000 200
par(mfrow = c(1,2))
fq <- betweenLaneNormalization(core, which="full")
pca <- prcomp(t(log1p(fq)))
plot(pca$x, col=cols[bio], pch=19, main="PCA of log-counts, 200 cells")
#legend("bottomleft", levels(bio), fill=cols)
plot(pca$x, col=cols2[cl], pch=19, main="PCA of log-counts, 200 cells")
Fit data with K = 2, V and X intercepts in both Mu and Pi, commondispersion = FALSE, and no covariate.
print(system.time(zinb <- zinbFit(core, ncores = 3, K = 2,
commondispersion = FALSE)))
## user system elapsed
## 367.619 217.622 231.034
100 cells are selected. Now, the dimensions are
estW = zinb@W
filt1 = estW[,1] > -1 & bio == 'Rbp4-Cre_KL100'
estW = estW[!filt1, ]
bio = bio[!filt1]
core = core[,!filt1]
filt2 = estW[,2] < 0 & bio == 'Scnn1a-Tg3-Cre'
estW = estW[!filt2, ]
bio = bio[!filt2]
core = core[,!filt2]
chosen = sample(nrow(estW), 100, replace = F)
bio = bio[chosen]
estW = estW[chosen, ]
core5 = core[,chosen]
dim(core5)
## [1] 1000 100
chosenCells = colnames(core5)
detection_rate <- colSums(core5>0)
coverage <- colSums(core5)
print(system.time(zinb5 <- zinbFit(core5, ncores = 3, K = 2,
commondispersion = FALSE)))
## user system elapsed
## 243.519 130.365 154.869
par(mfrow = c(1,3))
plot(zinb@W, col=cols[bioIni], pch=19, xlab="W1", ylab="W2",
main="Fitted W, 200 cells")
plot(estW, col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Fitted W, 100 cells")
plot(zinb5@W, col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Fitted W, 100 cells, refit")
zinb = zinb5
W = data.frame(W1 = zinb@W[,1], W2 = zinb@W[,2], bio = bio)
ggplot(melt(W), aes(value, fill = bio)) + geom_density(alpha = 0.2) +
facet_grid(~ variable) + xlab('fitted W') + theme_bw()
W simulated with \[W_1^{Scnn1a} \sim \mathcal{N}(1, .3)\] \[W_2^{Scnn1a} \sim \mathcal{N}(3, .3)\] \[W_1^{Rbp4} \sim \mathcal{N}(2, .3)\] \[W_2^{Rbp4} \sim \mathcal{N}(-3, .3)\] \[W_1^{Ntsr1} \sim \mathcal{N}(-4, .4)\] \[W_2^{Ntsr1} \sim \mathcal{N}(0, .4)\] TODO: change numeric values.
Sc = grepl('^Scnn1a', W$bio)
Rb = grepl('^Rbp4', W$bio)
Nt = grepl('^Ntsr1', W$bio)
W[Sc, 'W1sim'] = rnorm(sum(Sc), 1, .3)
W[Sc, 'W2sim'] = rnorm(sum(Sc), 3, .3)
W[Rb, 'W1sim'] = rnorm(sum(Rb), 2, .3)
W[Rb, 'W2sim'] = rnorm(sum(Rb), -3, .3)
W[Nt, 'W1sim'] = rnorm(sum(Nt), -4, .4)
W[Nt, 'W2sim'] = rnorm(sum(Nt), 0, .4)
par(mfrow=c(1, 2))
plot(zinb@W, col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Fitted")
plot(W[,c('W1sim', 'W2sim')], col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Simulated")
sim_W <- as.matrix(W[, c('W1sim', 'W2sim')])
simModel = simModel5 = zinbModelFromW(core5, sim_W)
simData = simData5 = zinbSim(simModel, seed = 8746)
df <- data.frame(gamma_mu = simModel@gamma_mu[1, ],
gamma_pi = simModel@gamma_pi[1, ],
detection_rate = detection_rate,
coverage = coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## gamma_mu gamma_pi detection_rate coverage
## gamma_mu 1.0000000 -0.2777558 0.3361947 0.8805161
## gamma_pi -0.2777558 1.0000000 -0.9845389 -0.5059706
## detection_rate 0.3361947 -0.9845389 1.0000000 0.5370054
## coverage 0.8805161 -0.5059706 0.5370054 1.0000000
df <- data.frame(alpha_mu_1 = simModel@alpha_mu[1, ],
alpha_mu_2 = simModel@alpha_mu[2, ],
alpha_pi_1 = simModel@alpha_pi[1, ],
alpha_pi_2 = simModel@alpha_pi[2, ])
pairs(df, pch=19, main = 'alpha')
print(cor(df, method="spearman"))
## alpha_mu_1 alpha_mu_2 alpha_pi_1 alpha_pi_2
## alpha_mu_1 1.00000000 0.1434140 -0.28012445 -0.03256204
## alpha_mu_2 0.14341402 1.0000000 -0.12087510 -0.39915616
## alpha_pi_1 -0.28012445 -0.1208751 1.00000000 0.03098031
## alpha_pi_2 -0.03256204 -0.3991562 0.03098031 1.00000000
df <- data.frame(beta_mu = simModel@beta_mu[1, ],
beta_pi = simModel@beta_pi[1, ])
pairs(df, pch=19, main = 'Beta')
print(cor(df, method="spearman"))
## beta_mu beta_pi
## beta_mu 1.0000000 -0.4648914
## beta_pi -0.4648914 1.0000000
chosenGenes = c(names(pZero)[pZero > 0.4][1:50],
names(pZero)[pZero<0.4 & pZero>0.2][1:550],
names(pZero)[pZero<0.2][1:400])
core25 <- postfilter[chosenGenes, chosenCells]
sum(core25 == 0)/(nrow(core25) * ncol(core25))
## [1] 0.25206
print(system.time(zinb25 <- zinbFit(core25, ncores = 3, K = 2,
commondispersion = FALSE)))
## user system elapsed
## 236.609 130.312 152.826
zinb = zinb25
W = data.frame(W1 = zinb@W[,1], W2 = zinb@W[,2], bio = bio)
ggplot(melt(W), aes(value, fill = bio)) + geom_density(alpha = 0.2) +
facet_grid(~ variable) + xlab('fitted W') + theme_bw()
W simulated with \[W_1^{Scnn1a} \sim \mathcal{N}(1.5, .3)\] \[W_2^{Scnn1a} \sim \mathcal{N}(2, .3)\] \[W_1^{Rbp4} \sim \mathcal{N}(1, .3)\] \[W_2^{Rbp4} \sim \mathcal{N}(-3, .3)\] \[W_1^{Ntsr1} \sim \mathcal{N}(-3, .3)\] \[W_2^{Ntsr1} \sim \mathcal{N}(0, .3)\]
Sc = grepl('^Scnn1a', W$bio)
Rb = grepl('^Rbp4', W$bio)
Nt = grepl('^Ntsr1', W$bio)
W[Sc, 'W1sim'] = rnorm(sum(Sc), 1.5, .3)
W[Sc, 'W2sim'] = rnorm(sum(Sc), 2, .3)
W[Rb, 'W1sim'] = rnorm(sum(Rb), 1, .3)
W[Rb, 'W2sim'] = rnorm(sum(Rb), -3, .3)
W[Nt, 'W1sim'] = rnorm(sum(Nt), -3, .3)
W[Nt, 'W2sim'] = rnorm(sum(Nt), 0, .3)
par(mfrow=c(1, 2))
plot(zinb@W, col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Fitted", xlim = c(-5, 3), ylim = c(-4, 3))
plot(W[,c('W1sim', 'W2sim')], col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Simulated", xlim = c(-5, 3), ylim = c(-4, 3))
sim_W <- as.matrix(W[, c('W1sim', 'W2sim')])
simModel = simModel25 = zinbModelFromW(core25, sim_W)
simData = simData25 = zinbSim(simModel, seed = 8746)
detection_rate <- colSums(core25>0)
coverage <- colSums(core25)
df <- data.frame(gamma_mu = simModel@gamma_mu[1, ],
gamma_pi = simModel@gamma_pi[1, ],
detection_rate = detection_rate,
coverage = coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## gamma_mu gamma_pi detection_rate coverage
## gamma_mu 1.0000000 -0.3801500 0.4191412 0.8914011
## gamma_pi -0.3801500 1.0000000 -0.9886676 -0.5168317
## detection_rate 0.4191412 -0.9886676 1.0000000 0.5474675
## coverage 0.8914011 -0.5168317 0.5474675 1.0000000
df <- data.frame(alpha_mu_1 = simModel@alpha_mu[1, ],
alpha_mu_2 = simModel@alpha_mu[2, ],
alpha_pi_1 = simModel@alpha_pi[1, ],
alpha_pi_2 = simModel@alpha_pi[2, ])
pairs(df, pch=19, main = 'alpha')
print(cor(df, method="spearman"))
## alpha_mu_1 alpha_mu_2 alpha_pi_1 alpha_pi_2
## alpha_mu_1 1.00000000 -0.03073397 -0.37139808 0.02845402
## alpha_mu_2 -0.03073397 1.00000000 -0.04157398 -0.29780065
## alpha_pi_1 -0.37139808 -0.04157398 1.00000000 -0.22350240
## alpha_pi_2 0.02845402 -0.29780065 -0.22350240 1.00000000
df <- data.frame(beta_mu = simModel@beta_mu[1, ],
beta_pi = simModel@beta_pi[1, ])
pairs(df, pch=19, main = 'Beta')
print(cor(df, method="spearman"))
## beta_mu beta_pi
## beta_mu 1.00000000 -0.08039971
## beta_pi -0.08039971 1.00000000
chosenGenes = c(names(pZero)[pZero > 0.8][1:800],
names(pZero)[pZero < 0.8 & pZero > 0.3][1:100],
names(pZero)[pZero < 0.3][1:100])
core75 <- postfilter[chosenGenes, chosenCells]
sum(core75 == 0)/(nrow(core75) * ncol(core75))
## [1] 0.74562
print(system.time(zinb75 <- zinbFit(core75, ncores = 3, K = 2,
maxiter.optimize = 100,
commondispersion = FALSE)))
## Warning in simpute.als(x, J, thresh, lambda, maxit, trace.it, warm.start, :
## Convergence not achieved by 100 iterations
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## Warning in optimize(f = zinb.loglik.dispersion, Y = as.matrix(Y), mu =
## mu, : NA/Inf replaced by maximum positive value
## user system elapsed
## 836.387 540.840 569.358
zinb = zinb75
W = data.frame(W1 = zinb@W[,1], W2 = zinb@W[,2], bio = bio)
ggplot(melt(W), aes(value, fill = bio)) + geom_density(alpha = 0.2) +
facet_grid(~ variable) + xlab('fitted W') + theme_bw()
W simulated with \[W_1^{Scnn1a} \sim \mathcal{N}(.5, .3)\] \[W_2^{Scnn1a} \sim \mathcal{N}(2, .3)\] \[W_1^{Rbp4} \sim \mathcal{N}(-4, .3)\] \[W_2^{Rbp4} \sim \mathcal{N}(-1, .3)\] \[W_1^{Ntsr1} \sim \mathcal{N}(2, .3)\] \[W_2^{Ntsr1} \sim \mathcal{N}(-3, .3)\] TODO: change numeric values.
Sc = grepl('^Scnn1a', W$bio)
Rb = grepl('^Rbp4', W$bio)
Nt = grepl('^Ntsr1', W$bio)
W[Sc, 'W1sim'] = rnorm(sum(Sc), 5, 1)
W[Sc, 'W2sim'] = rnorm(sum(Sc), 2.5, 1.5)
W[Rb, 'W1sim'] = rnorm(sum(Rb), -2.5, 1)
W[Rb, 'W2sim'] = rnorm(sum(Rb), -7.5, 1)
W[Nt, 'W1sim'] = rnorm(sum(Nt), -7.5, 1)
W[Nt, 'W2sim'] = rnorm(sum(Nt), 5, 2)
par(mfrow=c(1, 2))
plot(zinb@W, col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Fitted", ylim = c(-10, 10), xlim = c(-15, 12))
plot(W[,c('W1sim', 'W2sim')], col=cols[bio], pch=19, xlab="W1", ylab="W2",
main="Simulated", ylim = c(-10, 10), xlim = c(-15, 12))
sim_W <- as.matrix(W[, c('W1sim', 'W2sim')])
simModel = simModel75 = zinbModelFromW(core75, sim_W)
simData = simData75 = zinbSim(simModel, seed = 8746)
detection_rate <- colSums(core75>0)
coverage <- colSums(core75)
df <- data.frame(gamma_mu = simModel@gamma_mu[1, ],
gamma_pi = simModel@gamma_pi[1, ],
detection_rate = detection_rate,
coverage = coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## gamma_mu gamma_pi detection_rate coverage
## gamma_mu 1.0000000 -0.2133453 0.3033864 0.8667627
## gamma_pi -0.2133453 1.0000000 -0.9844274 -0.5080228
## detection_rate 0.3033864 -0.9844274 1.0000000 0.5622813
## coverage 0.8667627 -0.5080228 0.5622813 1.0000000
df <- data.frame(alpha_mu_1 = simModel@alpha_mu[1, ],
alpha_mu_2 = simModel@alpha_mu[2, ],
alpha_pi_1 = simModel@alpha_pi[1, ],
alpha_pi_2 = simModel@alpha_pi[2, ])
pairs(df, pch=19, main = 'alpha')
print(cor(df, method="spearman"))
## alpha_mu_1 alpha_mu_2 alpha_pi_1 alpha_pi_2
## alpha_mu_1 1.000000000 0.02409092 0.008606553 -0.01618534
## alpha_mu_2 0.024090924 1.00000000 -0.043964108 0.12142476
## alpha_pi_1 0.008606553 -0.04396411 1.000000000 0.01038956
## alpha_pi_2 -0.016185340 0.12142476 0.010389562 1.00000000
df <- data.frame(beta_mu = simModel@beta_mu[1, ],
beta_pi = simModel@beta_pi[1, ])
pairs(df, pch=19, main = 'Beta')
print(cor(df, method="spearman"))
## beta_mu beta_pi
## beta_mu 1.00000000 -0.05301733
## beta_pi -0.05301733 1.00000000
core = core25
zinb = zinb25
simData = simData25
simModel = simModel25
obs_detection_rate <- colMeans(core>0)
obs_coverage <- colSums(core)
sim_detection_rate <- rowMeans(simData$counts>0)
sim_coverage <- rowSums(simData$counts)
par(mfrow=c(2, 2))
plot(rowMeans(getPi(zinb)), obs_detection_rate,
xlab="Average fitted Pi", ylab="Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(zinb))), log1p(obs_coverage), xlim = c(5, 8),
xlab="Average fitted log Mu", ylab="log Coverage", pch=19, ylim = c(11,15),
col=cols[bio])
plot(rowMeans(getPi(simModel)), sim_detection_rate,
xlab="Average simulated Pi", ylab="True Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(simModel))), log1p(sim_coverage), xlim = c(5, 8),
xlab="Average simulated log Mu", ylab="log Coverage", pch=19,
col=cols[bio],ylim = c(11,15))
pal =colorRampPalette(c("black","black", "red","yellow"), space="rgb")
mean = apply(log1p(core), 1, function(x) mean(x[x!=0]))
par(mfrow=c(1, 2))
smoothScatter(mean, rowMeans(core == 0),xlim = c(3,8),
nrpoints=Inf, pch = "", cex = .7, ylim = c(0,1),
xlab = "Mean Expression", ylab = "Dropout Rate",
colramp = pal, main = 'Observed')
smoothScatter(colMeans(log1p(getMu(simModel))), colMeans(getPi(simModel)), nbin = 256,
nrpoints=Inf, pch="", cex=.7, xlim = c(3,8),
xlab = "Mean Expression", main = 'Simulated',
ylab = "Dropout Rate",ylim = c(0,1),
colramp = pal)
core = core5
zinb = zinb5
simData = simData5
simModel = simModel5
obs_detection_rate <- colMeans(core>0)
obs_coverage <- colSums(core)
sim_detection_rate <- rowMeans(simData$counts>0)
sim_coverage <- rowSums(simData$counts)
par(mfrow=c(2, 2))
plot(rowMeans(getPi(zinb)), obs_detection_rate,
xlab="Average fitted Pi", ylab="Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(zinb))), log1p(obs_coverage), xlim = c(4, 7),
xlab="Average fitted log Mu", ylab="log Coverage", pch=19, ylim = c(9,14),
col=cols[bio])
plot(rowMeans(getPi(simModel)), sim_detection_rate,
xlab="Average simulated Pi", ylab="True Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(simModel))), log1p(sim_coverage), xlim = c(4, 7),
xlab="Average simulated log Mu", ylab="log Coverage", pch=19,
col=cols[bio],ylim = c(9,14))
pal =colorRampPalette(c("black","black", "red","yellow"), space="rgb")
mean = apply(log1p(core), 1, function(x) mean(x[x!=0]))
par(mfrow=c(1, 2))
smoothScatter(mean, rowMeans(core == 0),xlim = c(3,8),
nrpoints=Inf, pch = "", cex = .7, ylim = c(0,1),
xlab = "Mean Expression", ylab = "Dropout Rate",
colramp = pal, main = 'Observed')
smoothScatter(colMeans(log1p(getMu(simModel))), colMeans(getPi(simModel)), nbin = 256,
nrpoints=Inf, pch="", cex=.7, xlim = c(3,8),
xlab = "Mean Expression", main = 'Simulated',
ylab = "Dropout Rate",ylim = c(0,1),
colramp = pal)
core = core75
zinb = zinb75
simData = simData75
simModel = simModel75
obs_detection_rate <- colMeans(core>0)
obs_coverage <- colSums(core)
sim_detection_rate <- rowMeans(simData$counts>0)
sim_coverage <- rowSums(simData$counts)
par(mfrow=c(2, 2))
plot(rowMeans(getPi(zinb)), obs_detection_rate,
xlab="Average fitted Pi", ylab="Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(zinb))), log1p(obs_coverage), xlim = c(3, 7),
xlab="Average fitted log Mu", ylab="log Coverage", pch=19, ylim = c(10,14),
col=cols[bio])
plot(rowMeans(getPi(simModel)), sim_detection_rate,
xlab="Average simulated Pi", ylab="True Detection Rate",
pch=19, col=cols[bio], ylim = c(0, 1), xlim = c(0,1))
plot(rowMeans(log1p(getMu(simModel))), log1p(sim_coverage), xlim = c(3, 7),
xlab="Average simulated log Mu", ylab="log Coverage", pch=19,
col=cols[bio],ylim = c(10,14))
pal =colorRampPalette(c("black","black", "red","yellow"), space="rgb")
mean = apply(log1p(core), 1, function(x) mean(x[x!=0]))
par(mfrow=c(1, 2))
smoothScatter(mean, rowMeans(core == 0),xlim = c(3,8),
nrpoints=Inf, pch = "", cex = .7, ylim = c(0,1),
xlab = "Mean Expression", ylab = "Dropout Rate",
colramp = pal, main = 'Observed')
smoothScatter(colMeans(log1p(getMu(simModel))), colMeans(getPi(simModel)), nbin = 256,
nrpoints=Inf, pch="", cex=.7, xlim = c(3,8),
xlab = "Mean Expression", main = 'Simulated',
ylab = "Dropout Rate",ylim = c(0,1),
colramp = pal)
B = 10
bio = as.vector(bio)
originalCounts = core5
simModel = simModel5
simData = lapply(seq_len(B), function(i) zinbSim(simModel, seed=i))
save(originalCounts, bio, simModel, simData, file = "./datasets/sim_allen5.rda")
zero5 = sapply(simData, function(x) x$zeroFraction)
originalCounts = core25
simModel = simModel25
simData = lapply(seq_len(B), function(i) zinbSim(simModel, seed=i))
save(originalCounts, bio, simModel, simData, file = "./datasets/sim_allen25.rda")
zero25 = sapply(simData, function(x) x$zeroFraction)
originalCounts = core75
simModel = simModel75
simData = lapply(seq_len(B), function(i) zinbSim(simModel, seed=i))
save(originalCounts, bio, simModel, simData, file = "./datasets/sim_allen75.rda")
zero75 = sapply(simData, function(x) x$zeroFraction)
zeroFrac = data.frame(zero = c(zero5, zero25, zero75),
perc = c(rep(.5, 10), rep(.25, 10), rep(.75, 10)))
zeroFrac$perc = factor(zeroFrac$perc)
zeroFrac = melt(zeroFrac)
ggplot(zeroFrac, aes(x = perc, y = value)) + geom_boxplot() +
ylab('zero fraction') + xlab('wanted zero fraction') + ggtitle('one boxplot = 10 replicates')